/**
 * Fix the bug of the original CssSelectorNodeFilter class in HtmlParser.
 * Bug ID: 2093824
 * Bug ID: 2344451
 */
// HTMLParser Library - A java-based parser for HTML
// http://htmlparser.org
// Copyright (C) 2006 Rogers George
//
// Revision Control Information
//
// $URL: https://htmlparser.svn.sourceforge.net/svnroot/htmlparser/tags/HTMLParserProject-2.1/parser/src/main/java/org/htmlparser/filters/CssSelectorNodeFilter.java $
// $Author: derrickoswald $
// $Date: 2006-09-23 02:26:16 +0200 (Sat, 23 Sep 2006) $
// $Revision: 11 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the Common Public License; either
// version 1.0 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// Common Public License for more details.
//
// You should have received a copy of the Common Public License
// along with this library; if not, the license is available from
// the Open Source Initiative (OSI) website:
//   http://opensource.org/licenses/cpl1.0.php

package org.tulkas.robot.patch;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Tag;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.HasParentFilter;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;

/**
 * A NodeFilter that accepts nodes based on whether they match a CSS2 selector.
 * Refer to <a href="http://www.w3.org/TR/REC-CSS2/selector.html">
 * http://www.w3.org/TR/REC-CSS2/selector.html</a> for syntax.
 * <p>
 * Todo: more thorough testing, any relevant pseudo-classes, css3 features
 */
public class CssSelectorNodeFilter implements NodeFilter {
	/**
	 * 
	 */
	private static final long serialVersionUID = 4333025506206666478L;

	/**
	 * Regular expression to split the selector into tokens.
	 */
	private static Pattern tokens = Pattern.compile("(" + "/\\*.*?\\*/" // comments
			+ ") | (" + "   \".*?[^\"]\"" // double quoted string
			+ " | \'.*?[^\']\'" // single quoted string
			+ " | \"\" | \'\' " // empty quoted string
			+ ") | (" + " [\\~\\*\\$\\^\\|]? = " // attrib-val relations
			+ ") | (" + " [a-zA-Z_\\*](?:[a-zA-Z0-9_-]|\\\\.)* " // bare name
			+ ") | \\s*(" + " [+>~\\s] " // combinators
			+ ")\\s* | (" + " [\\.\\[\\]\\#\\:)(] " // class/id/attr/param delims
			+ ") | (" + " [\\,] " // comma
			+ ") | ( . )" // everything else (bogus)
	, Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.COMMENTS);

	/**
	 * Comment token type.
	 */
	private static final int COMMENT = 1;

	/**
	 * quoted string token type.
	 */
	private static final int QUOTEDSTRING = 2;

	/**
	 * Relation token type.
	 */
	private static final int RELATION = 3;

	/**
	 * Name token type.
	 */
	private static final int NAME = 4;

	/**
	 * Combinator token type.
	 */
	private static final int COMBINATOR = 5;

	/**
	 * Delimiter token type.
	 */
	private static final int DELIM = 6;

	/**
	 * Comma token type.
	 */
	private static final int COMMA = 7;

	private NodeFilter therule;

	private Matcher m = null;
	private int tokentype = 0;
	private String token = null;

	/**
	 * Create a Cascading Style Sheet node filter.
	 * @param selector The selector expression.
	 */
	public CssSelectorNodeFilter(String selector) {
		m = tokens.matcher(selector);
		if (nextToken())
			therule = parse();
	}

	/**
	 * Accept nodes that match the selector expression.
	 * @param node The node to check.
	 * @return <code>true</code> if the node matches,
	 * <code>false</code> otherwise.
	 */
	public boolean accept(Node node) {
		return (therule.accept(node));
	}

	private boolean nextToken() {
		if (m != null && m.find())
			for (int i = 1; i < m.groupCount(); i++)
				if (null != m.group(i)) {
					tokentype = i;
					token = m.group(i);
					return (true);
				}
		tokentype = 0;
		token = null;

		return (false);
	}

	private NodeFilter parse() {
		NodeFilter ret;

		ret = null;
		do {
			switch (tokentype) {
			case COMMENT:
			case NAME:
			case DELIM:
				if (ret == null)
					ret = parseSimple();
				else
					ret = new AndFilter(ret, parseSimple());
				break;
			case COMBINATOR:
				switch (token.charAt(0)) {
				case '+':
					ret = new AdjacentFilter(ret);
					break;
				case '>':
					ret = new HasParentFilter(ret);
					break;
				default: // whitespace
					ret = new HasAncestorFilter(ret);
				}
				nextToken();
				break;
			case COMMA:
				nextToken();
				ret = new OrFilter(ret, parse());
				break;
			default:
				throw new IllegalArgumentException("invalid character '"
						+ token + "'");
			}
		} while (token != null);

		return (ret);
	}

	private NodeFilter parseSimple() {
		boolean done = false;
		NodeFilter ret = null;

		if (token != null)
			do {
				switch (tokentype) {
				case COMMENT:
					nextToken();
					break;
				case NAME:
					if ("*".equals(token))
						ret = new YesFilter();
					else if (ret == null)
						ret = new TagNameFilter(unescape(token));
					else
						ret = new AndFilter(ret, new TagNameFilter(
								unescape(token)));
					nextToken();
					break;
				case DELIM:
					switch (token.charAt(0)) {
					case '.':
						nextToken();
						if (tokentype != NAME)
							throw new IllegalArgumentException(
									"Syntax error at " + token);
						if (ret == null)
							ret = new HasAttributeFilter("class",
									unescape(token));
						else
							ret = new AndFilter(ret, new HasAttributeFilter(
									"class", unescape(token)));
						break;
					case '#':
						nextToken();
						if (tokentype != NAME)
							throw new IllegalArgumentException(
									"Syntax error at " + token);
						if (ret == null)
							ret = new HasAttributeFilter("id", unescape(token));
						else
							ret = new AndFilter(ret, new HasAttributeFilter(
									"id", unescape(token)));
						break;
					case ':':
						nextToken();
						if (ret == null)
							ret = parsePseudoClass();
						else
							ret = new AndFilter(ret, parsePseudoClass());
						break;
					case '[':
						nextToken();
						if (ret == null)
							ret = parseAttributeExp();
						else
							ret = new AndFilter(ret, parseAttributeExp());
						break;
					}
					nextToken();
					break;
				default:
					done = true;
				}
			} while (!done && token != null);

		return (ret);
	}

	private NodeFilter parsePseudoClass() {
		throw new IllegalArgumentException("pseudoclasses not implemented yet");
	}

	private NodeFilter parseAttributeExp() {
		NodeFilter ret = null;
		if (tokentype == NAME) {
			String attrib = token;
			nextToken();
			if ("]".equals(token))
				ret = new HasAttributeFilter(unescape(attrib));
			else if (tokentype == RELATION) {
				String val = null, rel = token;
				nextToken();
				if (tokentype == QUOTEDSTRING)
					val = unescape(token.substring(1, token.length() - 1));
				else if (tokentype == NAME)
					val = unescape(token);
				if ("~=".equals(rel) && val != null)
					ret = new AttribMatchFilter(unescape(attrib), "\\b"
							+ val.replaceAll("([^a-zA-Z0-9])", "\\\\$1")
							+ "\\b");
				else if ("|=".equals(rel) && val != null)
					ret = new AttribMatchFilter(unescape(attrib), val
							.replaceAll("([^a-zA-Z0-9])", "\\\\$1")
							+ "\\-[^a-zA-Z0-9]*");
				else if ("=".equals(rel) && val != null)
					ret = new HasAttributeFilter(attrib, val);
			}
		}
		if (ret == null)
			throw new IllegalArgumentException("Syntax error at " + token
					+ tokentype);

		nextToken();

		return (ret);
	}

	/**
	 * Replace escape sequences in a string.
	 * @param escaped The string to examine.
	 * @return The argument with escape sequences replaced by their
	 * equivalent character.
	 */
	public static String unescape(String escaped) {
		StringBuffer result = new StringBuffer(escaped.length());
		Matcher m = Pattern.compile("\\\\(?:([a-fA-F0-9]{2,6})|(.))").matcher(
				escaped);
		while (m.find()) {
			if (null != m.group(1))
				m.appendReplacement(result, String.valueOf((char) Integer
						.parseInt(m.group(1), 16)));
			else if (null != m.group(2))
				m.appendReplacement(result, m.group(2));
		}
		m.appendTail(result);

		return (result.toString());
	}

	private static class HasAncestorFilter implements NodeFilter {
		private static final long serialVersionUID = 2999717239254205103L;
		private NodeFilter atest;

		public HasAncestorFilter(NodeFilter n) {
			atest = n;
		}

		public boolean accept(Node n) {
			while (n != null) {
				n = n.getParent();
				if (atest.accept(n))
					return (true);
			}
			return (false);
		}
	}

	private static class AdjacentFilter implements NodeFilter {
		private static final long serialVersionUID = -3844228558043075366L;
		private NodeFilter sibtest;

		public AdjacentFilter(NodeFilter n) {
			sibtest = n;
		}

		public boolean accept(Node n) {
			if (n.getParent() != null) {
				NodeList l = n.getParent().getChildren();
				for (int i = 0; i < l.size(); i++)
					if (l.elementAt(i) == n && i > 0)
						return (sibtest.accept(l.elementAt(i - 1)));
			}
			return (false);
		}
	}

	private static class YesFilter implements NodeFilter {
		private static final long serialVersionUID = 1397123515254237677L;

		public boolean accept(Node n) {
			return (true);
		}
	}

	private static class AttribMatchFilter implements NodeFilter {
		private static final long serialVersionUID = -1340322873540508008L;
		private Pattern rel;
		private String attrib;

		public AttribMatchFilter(String attrib, String regex) {
			rel = Pattern.compile(regex);
			this.attrib = attrib;
		}

		public boolean accept(Node node) {
			if (node instanceof Tag
					&& ((Tag) node).getAttribute(attrib) != null)
				if (rel != null
						&& !rel.matcher(((Tag) node).getAttribute(attrib))
								.find())
					return (false);
				else
					return (true);
			else
				return (false);
		}
	}
}
